clear all
clear matrix
clear mata

set more off

///This program generates Table 1-3 

local path = "...folder name"

cd "`path'"

/*sintering: Table 1 Panel A*/
use sinter_plant_pf.dta, clear

keep id firm plant time year production number size owner region labor


gen t = string(time)
gen fid = plant + t
drop t 

**trim outliers
drop if labor < 1 | labor > 10000
merge 1:1 fid using sinter_tfp_lmk_link.dta
drop _merge
gen logtfp = log(tfp_sinter)
drop if logtfp > 5 

sum size 
bysort owner: sum size /*min and max come from all observations*/

***panel number
bysort plant: egen size1 = mean(size)
bysort plant: gen pid = _n
keep if pid == 1
drop size
rename size1 size

tab owner

***size summary
sum size
bysort owner: sum size

/*pig-iron making: Table 1 Panel B*/
use iron_plant_pf.dta, clear

keep plant firm time year month production number size owner region labor

gen t = string(time)
gen fid = plant + t
drop t 

**trim outliers
drop if labor < 1
merge 1:1 fid using iron_tfp_lmk_link.dta
drop _merge
gen logtfp = log(tfp_iron)
drop if logtfp > 4 | logtfp < -4 

sum size
bysort owner: sum size /*min and max come from all observations*/

***panel number
bysort plant: egen size1 = mean(size)
bysort plant: gen pid = _n
keep if pid == 1
drop size
rename size1 size

tab owner

***size summary
sum size
bysort owner: sum size

/*steel making: Table 1 Panel C*/
use steel_plant_pf.dta, clear

keep plant firm time production number size owner region labor

gen t = string(time)
gen fid = plant + t
drop t 

*trim outliers
drop if labor < 1 | labor > 500000
merge 1:1 fid using steel_tfp_lmk_link.dta
drop _merge
gen logtfp = log(tfp_steel)
drop if logtfp > 2 | logtfp <= -2 

sum size
bysort owner: sum size /*min and max come from all observations*/

***panel number
bysort plant: egen size1 = mean(size)
bysort plant: gen pid = _n
keep if pid == 1

drop size
rename size1 size
tab owner

***size summary
sum size
bysort owner: sum size

/*Integrated Facility*/
use 3stage_tfp_link_aggregate.dta, clear

drop if plant == ""
drop if nb_sinter == 0 | nb_iron == 0 | nb_steel == 0
drop if nb_sinter == . | nb_iron == . | nb_steel == .

***generate size quartiles
egen size10 = pctile(size_steel), p(10)
egen size25 = pctile(size_steel), p(25)
egen size50 = pctile(size_steel), p(50)
egen size75 = pctile(size_steel), p(75)
egen size90 = pctile(size_steel), p(90) 

/*Table 3*/
***number of plants by stage 
sum nb_sinter nb_iron nb_steel
bysort owner: sum nb_sinter nb_iron nb_steel

sum nb_sinter nb_iron nb_steel if size_steel < 90
bysort owner: sum nb_sinter nb_iron nb_steel if size_steel < 90

sum nb_sinter nb_iron nb_steel if size_steel >= 90 & size_steel < 160
bysort owner: sum nb_sinter nb_iron nb_steel if size_steel >= 90 & size_steel < 160

sum nb_sinter nb_iron nb_steel if size_steel >= 160 & size_steel < 300
bysort owner: sum nb_sinter nb_iron nb_steel if size_steel >= 160 & size_steel < 300

sum nb_sinter nb_iron nb_steel if size_steel >= 300
bysort owner: sum nb_sinter nb_iron nb_steel if size_steel >= 300

*add average size 
gen ave_sinter = size_sinter_steel/nb_sinter
gen ave_iron = size_iron/nb_iron
gen ave_steel = size_steel/nb_steel

sum ave_sinter ave_iron  ave_steel
bysort owner: sum ave_sinter ave_iron  ave_steel

sum ave_sinter ave_iron  ave_steel if size_steel < 90
bysort owner: sum ave_sinter ave_iron  ave_steel if size_steel < 90

sum ave_sinter ave_iron  ave_steel if size_steel >= 90 & size_steel < 160
bysort owner: sum ave_sinter ave_iron  ave_steel if size_steel >= 90 & size_steel < 160

sum ave_sinter ave_iron  ave_steel if size_steel >= 160 & size_steel < 300
bysort owner: sum ave_sinter ave_iron  ave_steel if size_steel >= 160 & size_steel < 300

sum ave_sinter ave_iron  ave_steel if size_steel >= 300
bysort owner: sum ave_sinter ave_iron  ave_steel if size_steel >= 300


/*generate Table 2 Panel B*/

*add steel production share by each size quartile 
egen pdt = sum(pdt_steel)
bysort owner: egen pdt_owner = sum(pdt_steel)
gen share_owner = pdt_owner/pdt

bysort owner: egen pdt25_owner = sum(pdt_steel) if size_steel < 90
bysort owner: egen pdt50_owner = sum(pdt_steel) if size_steel >= 90 & size_steel < 160
bysort owner: egen pdt75_owner = sum(pdt_steel) if size_steel >= 160 & size_steel < 300
bysort owner: egen pdt100_owner = sum(pdt_steel) if size_steel >= 300

gen share25_owner = pdt25_owner/pdt
gen share50_owner = pdt50_owner/pdt
gen share75_owner = pdt75_owner/pdt
gen share100_owner = pdt100_owner/pdt

bysort owner: sum share_owner
bysort owner: sum share25_owner 
bysort owner: sum share50_owner  
bysort owner: sum share75_owner  
bysort owner: sum share100_owner 

/*Table 1 Panel D*/
sum size_steel
bysort owner: sum size_steel /*min and max come from all observations*/

***panel number
bysort firm_iron_steel: egen size1 = mean(size_steel)

bysort firm_iron_steel: gen pid = _n
keep if pid == 1

drop size_steel
rename size1 size_steel

tab owner

sum size_steel
bysort owner: sum size_steel 

/*Table 2 Panel A*/
tab owner if size_steel < 90

tab owner if size_steel >= 90 & size_steel < 160

tab owner if size_steel >= 160 & size_steel < 300

tab owner if size_steel >= 300

